Since 2008, guests and hosts have used Airbnb to expand on traveling possibilities and present more unique, personalized way of experiencing the world. This dataset describes the listing activity and metrics in NYC, NY for 2019. This data file includes all needed information to find out more about hosts, geographical availability, necessary metrics to make predictions and draw conclusions.
ab_nyc <- read.csv("../../_data/AB_NYC_2019.csv") %>%
relocate(c("latitude", "longitude", "id", "name", "host_id", "host_name", "neighbourhood_group", "neighbourhood", "room_type", "minimum_nights", "number_of_reviews", "last_review", "reviews_per_month", "calculated_host_listings_count", "availability_365"))
head(ab_nyc)
latitude longitude id
1 40.64749 -73.97237 2539
2 40.75362 -73.98377 2595
3 40.80902 -73.94190 3647
4 40.68514 -73.95976 3831
5 40.79851 -73.94399 5022
6 40.74767 -73.97500 5099
name host_id
1 Clean & quiet apt home by the park 2787
2 Skylit Midtown Castle 2845
3 THE VILLAGE OF HARLEM....NEW YORK ! 4632
4 Cozy Entire Floor of Brownstone 4869
5 Entire Apt: Spacious Studio/Loft by central park 7192
6 Large Cozy 1 BR Apartment In Midtown East 7322
host_name neighbourhood_group neighbourhood room_type
1 John Brooklyn Kensington Private room
2 Jennifer Manhattan Midtown Entire home/apt
3 Elisabeth Manhattan Harlem Private room
4 LisaRoxanne Brooklyn Clinton Hill Entire home/apt
5 Laura Manhattan East Harlem Entire home/apt
6 Chris Manhattan Murray Hill Entire home/apt
minimum_nights number_of_reviews last_review reviews_per_month
1 1 9 2018-10-19 0.21
2 1 45 2019-05-21 0.38
3 3 0 NA
4 1 270 2019-07-05 4.64
5 10 9 2018-11-19 0.10
6 3 74 2019-06-22 0.59
calculated_host_listings_count availability_365 price
1 6 365 149
2 2 355 225
3 1 365 150
4 1 194 89
5 1 0 80
6 1 129 200
min_cost <- st_as_sf(as.data.frame(ab_nyc$minimum_nights * ab_nyc$price)
%>% `colnames<-`(c("Minimum Cost of Stay"))
%>% mutate(lat = ab_nyc$latitude, lon = ab_nyc$longitude)
%>% select(lat, lon, `Minimum Cost of Stay`),
coords = c("lon", "lat"),
crs = "+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"
)
summary(min_cost)
Minimum Cost of Stay geometry
Min. : 0.0 POINT :48895
1st Qu.: 135.0 epsg:NA : 0
Median : 300.0 +proj=long...: 0
Mean : 1284.4
3rd Qu.: 734.5
Max. :1170000.0
info <- summary(min_cost)
#as.numeric(str_extract(info[3], "[0-9]+"))
min_cost <- min_cost %>%
mutate(Affordability = case_when(
`Minimum Cost of Stay` <= as.numeric(str_extract(info[2], "[0-9]+")) ~ 1,
`Minimum Cost of Stay` <= as.numeric(str_extract(info[3], "[0-9]+")) ~ 2,
`Minimum Cost of Stay` <= as.numeric(str_extract(info[5], "[0-9]+")) ~ 3,
`Minimum Cost of Stay` > as.numeric(str_extract(info[5], "[0-9]+")) ~ 4
))